import csv
import os
import sys
import scipy.sparse
import numpy as np
import sklearn
from sklearn.metrics.pairwise import cosine_similarity
import pymysql
import re
import math
import time
from functools import reduce
print("please wait patiently...")

delete_list = ["供应商名称","中标金额","元","包名称","联系电话","详见公示","二包","标的名称",
               "统一社会信用代码：","最终报价","最后报价金额","（元）","投标总报价","（万元）","包一","包二","包三",
               "企业类型","投标价","成交价","包号","单价","总价","合同包","（人民币元）","（人民币：元）","评审结果",
               "最后得分","最后磋商报价","最后成交总报价","最后报价","（下浮率）","组织机构代码","证号","总得分","得分","综合得分","详见公告正文","某单位"]

# regions = ["辽宁","吉林","黑龙江","河北","山西","陕西",
#           "甘肃","青海","山东","安徽","江苏","浙江",
#            "河南","湖北","湖南","江西","台湾","福建",
#            "云南","海南","四川","贵州","广东",
#            "内蒙古","新疆","广西","西藏","宁夏",
#            "北京","上海","天津","重庆","香港","澳门"]#要加上”市“、”省“等等

#regions = ["重庆市","上海市"]




def getChinese(str1):
    for delete_content in delete_list:
        if delete_content in str1:
            str1 = str1.replace(delete_content,"")
    if "（" in str1 and "）" in str1:
        return str1
    res1 = ''.join(re.findall('[\u4e00-\u9fa5]', str1))
    return res1

def get_score(float_num):
    float_num = re.sub(r'[\W]','',float_num)

    try:
        regex = re.compile("\d+\W*\d*")
        float_num = re.findall(regex,float_num)[0]
        #print(float_num)
    except:
        #print(float_num)
        return 1
        #exit()
    try:
        float_num = float(float_num)
    except:
        #print(float_num)
        return 1
    try:
        score = 10.04 + 4.34 *math.log(float_num)
        #score = float_num
        if -2<=score<=2:
            return 1
        else:
            return score
    except:
        #print(float_num)
        return 1



def find_relation_byRegion(aim_region="('重庆')"):

    print("Updating recommand system,please wait!")
    if aim_region=="('全国')":
        sql = "select call_unit,agent_unit_name,provide_unit,actual_price from %s "% notice_table_name +"where call_unit != agent_unit_name AND locate('无', provide_unit )<=0 AND locate('地址', provide_unit )<=0 AND actual_price != '' AND actual_price != '0.0';"

    else:
        #aim_region = "'" + aim_region + "'"

        sql = "select call_unit,agent_unit_name,provide_unit,actual_price from %s where call_unit != agent_unit_name AND locate('无', provide_unit )<=0"% notice_table_name+" AND region in "+aim_region+" AND locate('地址', provide_unit )<=0 AND actual_price != '' AND actual_price != '0.0';"
    cursor_notice.execute(sql)
    relations_table = cursor_notice.fetchall()
    if not os.path.exists("xietongguolv"):
        os.makedirs("xietongguolv")
    with open("xietongguolv/relations"+"_"+aim_region+".csv","w",encoding="utf-8",newline="") as f:
        writer = csv.writer(f)
        for i in relations_table:
            writer.writerow(list(i))
    print("Done")
    #============================================================================================
    #开始预处理



def get_simularity_matrix(aim_region="('重庆')"):
    print("reading .csv,please wait")
    with open("xietongguolv/relations"+"_"+aim_region+".csv","r",encoding="utf-8") as f:
        reader = csv.reader(f)
        process = 1
        for i in reader:

            agent_name = getChinese(i[agent_col])
            if agent_name == "":#给代理机构推采购方的时候需要判断
                continue
            buyer_name =getChinese(i[buyer_col])
            if buyer_name=="" or buyer_name is None or buyer_name==" ":
                continue
            global buyers_names
            buyers_names.append(buyer_name)


            global agents_names
            agents_names.append(agent_name)

            global relations
            #global relations_dict
            # small_dict_buyer_agent = {}
            # small_dict_buyer_agent[buyer_name] = [agent_name]
            #relations_dict = reduce_fun(relations_dict,small_dict_buyer_agent)
            relations.append([buyer_name,agent_name,"",get_score(i[3])])
            # 开始统计各角色分数
            # 首先处理采购方和代理机构的关系
        buyers_names = list(set(buyers_names))
        agents_names = list(set(agents_names))
        buyers_dict = {}
        agents_dict = {}
        for i in range(len(buyers_names)):
            buyers_dict[buyers_names[i]] = i
        for i in range(len(agents_names)):
            agents_dict[agents_names[i]] = i

        buyers_indexs = []

        agents_indexs = []
        scores_indexs = []

        providers_indexs = []

        len_buyers = len(buyers_names)
        len_agents = len(agents_names)
        len_providers = len(providers_names)

        print("ceating matrix :" + str(len_buyers) + " x " + str(len_agents))
        process = 1
        time.sleep(3)
        len_relations = len(relations)
        for i in relations:
            print("\r", end="")
            print(command+" progress: {:.2f}%: ".format((process / len_relations)*0.2 * 100),
                  "▋" * int((process * 100 // len_relations) // 2*0.2), end="")
            buyer_name = i[0]
            buyer_index = buyers_dict[buyer_name]
            buyers_indexs.append(buyer_index)
            agent_name = i[1]
            agent_index = agents_dict[agent_name]
            agents_indexs.append(agent_index)

            score = i[3]
            scores_indexs.append(score)
            process += 1




        b_a = scipy.sparse.coo_matrix((scores_indexs, (buyers_indexs, agents_indexs)), shape=(len_buyers, len_agents))
        # 算法 
        #b_p = scipy.sparse.coo_matrix((scores_indexs, (buyers_indexs, providers_indexs)), shape=(len_buyers, len_providers))


        matrix_b_a = b_a.tocsr()

        #print(np.shape(matrix_b_a))
        notzero_rows = matrix_b_a.nonzero()[0]
        notzero_cols = matrix_b_a.nonzero()[1]

        # print(len(list(set(notzero_rows))))
        # print(len(list(set(notzero_cols))))

        busy_pair1 = list(zip(notzero_rows, notzero_cols))
        test_list = []


        global buyer_history
        buyer_history = []
        agents = []
        # with open("test.txt","w") as f:
        #     f.write(str(busy_pair1))

        process = 1
        len_busy = len(busy_pair1)
        for i in range(len(busy_pair1)):
            print("\r", end="")
            print(command+" progress: {:.2f}%: ".format(((process / len_busy)*0.2+0.2) * 100),
                  "▋" * int((process * 100 // len_busy)*0.2 // 2+10), end="")
            buyer = busy_pair1[i][0]
            agent = busy_pair1[i][1]
            agents.append(agent)
            process += 1
            try:

                if busy_pair1[i+1][0] ==buyer:
                    continue
                else:
                    buyer_history.append(agents)
                    agents=[]
            except:
                buyer_history.append(agents)



        matrix_T_b_a = matrix_b_a.transpose()
        notzero_rows = matrix_T_b_a.nonzero()[0]
        notzero_cols = matrix_T_b_a.nonzero()[1]
        busy_pair2 = list(zip(notzero_rows,notzero_cols))

        global agent_history
        agent_history = []
        buyers = []

        process = 1
        len_busy = len(busy_pair2)
        for i in range(len(busy_pair2)):

            print("\r", end="")
            print(command + " progress: {:.2f}%: ".format(((process / len_busy) * 0.2 + 0.4) * 100),
                  "▋" * int((process * 100 // len_busy) * 0.2 // 2 + 20), end="")
            agent = busy_pair2[i][0]
            buyer = busy_pair2[i][1]
            buyers.append(buyer)
            process += 1
            try:

                if busy_pair2[i+1][0] ==agent:
                    continue
                else:
                    agent_history.append(buyers)
                    buyers = []
            except:
                agent_history.append(buyers)



        process = 1
        buyers_sims = []
        sim_matrix = cosine_similarity(matrix_b_a,dense_output=False)
        notzero_rows = sim_matrix.nonzero()[0]
        notzero_cols = sim_matrix.nonzero()[1]
        notzero_datas = sim_matrix.data
        sim_pair = list(zip(notzero_rows,notzero_cols,notzero_datas))
        # with open("xietongguolv/simularity_test.csv", "w", encoding="utf-8", newline="") as f:
        #     writer = csv.writer(f)
        for i in sim_pair:
            index1 = i[0]
            buyer_name1 = buyers_names[index1]
            index2 = i[1]
            buyer_name2 = buyers_names[index2]
            if index1 == index2:
                similarity = 0
            else:
                similarity = i[2]
            buyers_sims.append((index1, index2, similarity,buyer_name1,buyer_name2))
            #writer.writerow([index1,index2,similarity])

        # for i in sim_matrix:
        #     print(i[0][0][0])
        #     print(type(i))

        #print(str(process) + " / " + str(len_buyers) + " :" + str(sim))
        #process += 1
        # for i in range(len_buyers):
        #     for j in range(len_buyers):
        #         if j > i:
        #             break
        #         if i == j:
        #             sim = 0
        #         else:
        #
        #
        #             try:
        #                 object1 = matrix_b_a[i].toarray()[0]
        #                 object2 = matrix_b_a[j].toarray()[0]
        #                 cos1 = cosine(object1, object2)
        #                 sim = 1 - cos1
        #                 del object1,object2,cos1
        #             except:
        #                 print(matrix_b_a[i].toarray()[0])
        #                 exit()
        #
        #
        #         if sim<=0.001:
        #             pass
        #         else:
        #             buyer_name1 = buyers_names[i]
        #             buyer_name2 = buyers_names[j]
        #             try:
        #                 # buyer_agents1 = list(map(lambda x:agents_names[x],np.nonzero(np.array(matrix[i]))[0]))
        #                 # buyer_agents2 = list(map(lambda x:agents_names[x],np.nonzero(np.array(matrix[j]))[0]))
        #                 # more1 = list(set(buyer_agents2)-set(buyer_agents1))
        #                 # more2 = list(set(buyer_agents1) - set(buyer_agents2))
        #                 # res = list(set(buyer_agents2)&set(buyer_agents1))
        #
        #                 # recommend_reason1 ="您的客户 "+str(buyer_name2)+" 与其他采购方: "+str(buyer_name1)+" 是相似的，选择过相同的代理机构。我们挖掘到，该采购方，还没有跟您做过生意，是您潜在的客户"
        #                 # recommend_reason2 = "您的客户 "+str(buyer_name1)+" 与其他采购方: "+str(buyer_name2)+" 是相似的，选择过相同的代理机构。我们挖掘到，该采购方，还没有跟您做过生意，是您潜在的客户"
        #                 buyers_sims.append((i, j, sim,buyer_name1,buyer_name2))
        #             except:
        #                 print(np.nonzero(np.array(matrix_b_a[i]))[0],np.nonzero(np.array(matrix_b_a[j]))[0])
        #                 exit()



        buyers_sims.sort(key=lambda x:(x[0],-x[2]))
        with open("xietongguolv/simularity_+" + aim_region + ".csv", "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            for i in buyers_sims:
                writer.writerow(i)
        #print("write file finished")

        buyer_sims_buyers = [()]*len(buyers_names)
        buyer_buyers = []
        process = 1
        len_buyers_sims = len(buyers_sims)
        for i in range(len_buyers_sims):
            print("\r", end="")
            print(command+" progress: {:.2f}%: ".format(((process *0.2/ len_buyers_sims)+0.6) * 100),
                  "▋" * int((process * 100 // len_buyers_sims) *0.2// 2+30), end="")
            buyer_index = buyers_sims[i][0]
            buyer_buyers.append((buyers_sims[i][1], buyers_sims[i][2]))

            try:
                if buyers_sims[i+1][0] !=buyer_index:
                    buyer_sims_buyers[buyer_index] = buyer_buyers
                    buyer_buyers = []
            except:
                buyer_sims_buyers[buyer_index] = buyer_buyers
            process += 1

        return buyers_sims,buyer_sims_buyers

def get_agent_buyers(agent_index):

    list1= agent_history[agent_index]
    return list1



def get_buyer_agents(buyer_index):

    return buyer_history[buyer_index]

def get_simular_buyer_for_agent(buyer_index):
    return buyer_sims_buyers[buyer_index]

def list_contant(list1,list2):
    return list1 + list2

def textDistance(str1,str2):
    str1 = list(str1)
    str2 = list(str2)
    res =list(set(str1) & set(str2))
    ratio = float(len(res))/ float(len(str1))
    return  ratio

def tupleToJsonObject(tuple):
    object_id = tuple[0]
    object_name = tuple[2]
    object_score = tuple[1]
    object_dict = {"pid":object_id,"name":object_name,"score":object_score}
    return object_dict


ii = []
with open("Controller.config","r",encoding="utf-8") as f:
    controller_Data = f.readlines()
    for i in controller_Data  :
        try:
            ii.append(i.split(":")[1].rstrip("\n"))
        except Exception as e:
            print(e)
            print("wrong parameter:"+i)




try:
    contorller_id = ii[0]
    operater = ii[1]
    input_ip = ii[2]
    input_port = int(ii[3])
    input_user = ii[4]
    input_password = ii[5]
    notice_database = ii[6]
    notice_table_name = ii[7]
    buyer_col = ii[8]
    agent_col = ii[9]
    provider_col = ii[10]
    price_col = ii[11]
    region_col = ii[12]
    regions = ii[13].split(",")
    # regions = ["全国"]
    regions_name_list = str(tuple(regions)).replace('"', "")
    if ",)" in regions_name_list:
        regions_name_list = regions_name_list.replace(",)", ")")

    topK = int(ii[14])
    Update = ii[15]

    result_ip = ii[16]
    result_port = int(ii[17])
    result_user = ii[18]
    result_password = ii[19]
    result_database = ii[20]
    result_table_name = ii[21]
except Exception as e:
    print(e)
    print("input parameters wrong!")
    sys.exit()


try:





    #链接数据库
    try:
        conn0 = pymysql.connect(host=input_ip,  # 192.168.2.20
                                port=input_port,
                                user=input_user,
                                password=input_password,
                                database=notice_database,
                                charset="utf8")
        cursor_notice = conn0.cursor()  # 获取游标， 目的就是要执行sql语句
    except Exception as e:
        print(e)

        print(
            "Failed to reach database：" + notice_database + ",check input parameters or network connection or database state.")


    try:
        conn1 = pymysql.connect(host=result_ip,  # 192.168.2.20
                                port=result_port,
                                user=result_user,
                                password=result_password,
                                database=result_database,
                                charset="utf8")
        cursor_result = conn1.cursor()  # 获取游标， 目的就是要执行sql语句
        sql_findMax = "SELECT MAX(result_id) FROM "+result_table_name
        cursor_result.execute(sql_findMax)
        maxid = cursor_result.fetchall()[0][0]
        if maxid is None:
            maxid = 0




    except Exception as e:
        print(e)
        print(
            "Failed to reach database：" + result_database + ",check input parameters or network connection or database state.")


    commands = ["GiveProvidersToBuyers","GiveBuyersToProviders","GiveAgentsToBuyers","GiveBuyersToAgents",]
    result_id = maxid+1



    region = regions_name_list
    for com in commands:
        command = com
        if command == "GiveProvidersToBuyers":
            buyer_col = 2
            agent_col = 0
            character = "采购方"

        if command == "GiveBuyersToProviders":
            buyer_col = 0
            agent_col = 2
            character = "供应商"

        if command == "GiveAgentsToBuyers":
            buyer_col = 1
            agent_col = 0
            character = "采购方"

        if command == "GiveBuyersToAgents":
            buyer_col = 0
            agent_col = 1
            character = "代理机构"

        relations = []
        relations_dict = {}
        buyers_names = []
        agents_names = []
        providers_names = []
        buyer_history = []
        agent_history = []
        if Update=="是":
            find_relation_byRegion(region)
        buyers_sims,buyer_sims_buyers = get_simularity_matrix(region )
        #开始推荐
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        results = []
        now_forFile = now.replace(":","_").replace(" ","_")
        if not os.path.exists("xietongguolv/results"):
            os.makedirs("xietongguolv/results")
        with open("xietongguolv/results/"+command+"_in_"+region+"_"+now_forFile+".csv","w",encoding="utf-8",newline="") as f:
            writer = csv.writer(f)
            process = 1
            len_agents_names = len(agents_names)
            for i in range(len_agents_names):
                #找出代理结构的采购方用户列表，推荐相似的采购方用户
                print("\r", end="")
                print(command+" progress: {:.2f}%: ".format(((process / len_agents_names)*0.2+0.8) * 100),
                      "▋" * int((process * 100 // len_agents_names)*0.2 // 2+40), end="")
                recommand = []
                agents_buyers = get_agent_buyers(i)
                agent_log = str(list(map(lambda x: buyers_names[x], agents_buyers))).replace("[", "").replace("]",
                                                                                                              "").replace(
                    "'", "")
                map_value = list(map(get_simular_buyer_for_agent,agents_buyers))
                map_value = list(  reduce(list_contant,map_value)  )
                recommand = recommand + map_value
                recommand.sort(key=lambda x:x[0])
                code_value = []
                value = 0
                re_type = "成功推荐的"
                for j in range(len(recommand)):
                    code = recommand[j][0]
                    if code in agents_buyers:
                        continue
                    value = value + recommand[j][1]
                    try:
                        if recommand[j+1][0] ==code:
                            continue
                        else:
                            code_value.append((code,value,buyers_names[code]))
                            value = 0
                    except:
                        code_value.append((code, value,buyers_names[code]))


                #找到了代理机构的采购方列表，但他们没有相似的其他采购方，则随机推荐
                if len(code_value) == 0:
                    re_type = "随机赋值的"
                    random_indexs = np.random.randint(0,len(buyers_names)-1,int(len(buyers_names)*0.001))
                    random_buyers = list(map(lambda x:buyers_names[x],random_indexs))
                    random_sims = list(map(lambda x:textDistance(agent_log,x),random_buyers))
                    code_value = list(zip(random_indexs,random_sims,random_buyers))

                code_value.sort(key=lambda x: x[1], reverse=True)

                recommand_give = code_value[:topK]
                recommand_list = list(map(tupleToJsonObject,recommand_give))
                recommand_JSON = {"Objects":recommand_list}
                recommand_str = str(recommand_JSON)


                results.append((result_id,command,character,agents_names[i],recommand_str,re_type,region,now,contorller_id))


                result_id+=1


                #results.append(())
                writer.writerow([i,agents_names[i],code_value[:topK],re_type,region,now,contorller_id])
                process += 1



        print("\n")
        sql_result = 'insert into ' + result_table_name + ' values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        cursor_result.executemany(sql_result,results)
        conn1.commit()


    print("finished!!!")
except Exception as e:
    print("推荐失败")
    print(e)


